library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Life Expectancy Dataset

Life Expectancy dataset from Our World in Data.

le_df <- read_csv("data/life-expectancy.csv")
## Rows: 20755 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Period life expectancy at birth - Sex: all - Age: 0
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(le_df)
## Rows: 20,755
## Columns: 4
## $ Entity                                                <chr> "Afghanistan", "…
## $ Code                                                  <chr> "AFG", "AFG", "A…
## $ Year                                                  <dbl> 1950, 1951, 1952…
## $ `Period life expectancy at birth - Sex: all - Age: 0` <dbl> 27.7275, 27.9634…

Cleaning

le_clean <- janitor::clean_names(le_df)
# rename column
le_clean <- le_clean %>% 
  rename(period_life_expect = period_life_expectancy_at_birth_sex_all_age_0)
glimpse(le_clean)
## Rows: 20,755
## Columns: 4
## $ entity             <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgha…
## $ code               <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "A…
## $ year               <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 195…
## $ period_life_expect <dbl> 27.7275, 27.9634, 28.4456, 28.9304, 29.2258, 29.920…

Entity

It include countries, continents, world, groups of different development status and income levels.

le_clean %>% pull(entity) %>% unique()
##   [1] "Afghanistan"                                                
##   [2] "Africa"                                                     
##   [3] "Albania"                                                    
##   [4] "Algeria"                                                    
##   [5] "American Samoa"                                             
##   [6] "Americas"                                                   
##   [7] "Andorra"                                                    
##   [8] "Angola"                                                     
##   [9] "Anguilla"                                                   
##  [10] "Antigua and Barbuda"                                        
##  [11] "Argentina"                                                  
##  [12] "Armenia"                                                    
##  [13] "Aruba"                                                      
##  [14] "Asia"                                                       
##  [15] "Australia"                                                  
##  [16] "Austria"                                                    
##  [17] "Azerbaijan"                                                 
##  [18] "Bahamas"                                                    
##  [19] "Bahrain"                                                    
##  [20] "Bangladesh"                                                 
##  [21] "Barbados"                                                   
##  [22] "Belarus"                                                    
##  [23] "Belgium"                                                    
##  [24] "Belize"                                                     
##  [25] "Benin"                                                      
##  [26] "Bermuda"                                                    
##  [27] "Bhutan"                                                     
##  [28] "Bolivia"                                                    
##  [29] "Bonaire Sint Eustatius and Saba"                            
##  [30] "Bosnia and Herzegovina"                                     
##  [31] "Botswana"                                                   
##  [32] "Brazil"                                                     
##  [33] "British Virgin Islands"                                     
##  [34] "Brunei"                                                     
##  [35] "Bulgaria"                                                   
##  [36] "Burkina Faso"                                               
##  [37] "Burundi"                                                    
##  [38] "Cambodia"                                                   
##  [39] "Cameroon"                                                   
##  [40] "Canada"                                                     
##  [41] "Cape Verde"                                                 
##  [42] "Cayman Islands"                                             
##  [43] "Central African Republic"                                   
##  [44] "Chad"                                                       
##  [45] "Chile"                                                      
##  [46] "China"                                                      
##  [47] "Colombia"                                                   
##  [48] "Comoros"                                                    
##  [49] "Congo"                                                      
##  [50] "Cook Islands"                                               
##  [51] "Costa Rica"                                                 
##  [52] "Cote d'Ivoire"                                              
##  [53] "Croatia"                                                    
##  [54] "Cuba"                                                       
##  [55] "Curacao"                                                    
##  [56] "Cyprus"                                                     
##  [57] "Czechia"                                                    
##  [58] "Democratic Republic of Congo"                               
##  [59] "Denmark"                                                    
##  [60] "Djibouti"                                                   
##  [61] "Dominica"                                                   
##  [62] "Dominican Republic"                                         
##  [63] "East Timor"                                                 
##  [64] "Ecuador"                                                    
##  [65] "Egypt"                                                      
##  [66] "El Salvador"                                                
##  [67] "England and Wales"                                          
##  [68] "Equatorial Guinea"                                          
##  [69] "Eritrea"                                                    
##  [70] "Estonia"                                                    
##  [71] "Eswatini"                                                   
##  [72] "Ethiopia"                                                   
##  [73] "Europe"                                                     
##  [74] "Falkland Islands"                                           
##  [75] "Faroe Islands"                                              
##  [76] "Fiji"                                                       
##  [77] "Finland"                                                    
##  [78] "France"                                                     
##  [79] "French Guiana"                                              
##  [80] "French Polynesia"                                           
##  [81] "Gabon"                                                      
##  [82] "Gambia"                                                     
##  [83] "Georgia"                                                    
##  [84] "Germany"                                                    
##  [85] "Ghana"                                                      
##  [86] "Gibraltar"                                                  
##  [87] "Greece"                                                     
##  [88] "Greenland"                                                  
##  [89] "Grenada"                                                    
##  [90] "Guadeloupe"                                                 
##  [91] "Guam"                                                       
##  [92] "Guatemala"                                                  
##  [93] "Guernsey"                                                   
##  [94] "Guinea"                                                     
##  [95] "Guinea-Bissau"                                              
##  [96] "Guyana"                                                     
##  [97] "Haiti"                                                      
##  [98] "High-income countries"                                      
##  [99] "Honduras"                                                   
## [100] "Hong Kong"                                                  
## [101] "Hungary"                                                    
## [102] "Iceland"                                                    
## [103] "India"                                                      
## [104] "Indonesia"                                                  
## [105] "Iran"                                                       
## [106] "Iraq"                                                       
## [107] "Ireland"                                                    
## [108] "Isle of Man"                                                
## [109] "Israel"                                                     
## [110] "Italy"                                                      
## [111] "Jamaica"                                                    
## [112] "Japan"                                                      
## [113] "Jersey"                                                     
## [114] "Jordan"                                                     
## [115] "Kazakhstan"                                                 
## [116] "Kenya"                                                      
## [117] "Kiribati"                                                   
## [118] "Kosovo"                                                     
## [119] "Kuwait"                                                     
## [120] "Kyrgyzstan"                                                 
## [121] "Land-locked Developing Countries (LLDC)"                    
## [122] "Laos"                                                       
## [123] "Latin America and the Caribbean"                            
## [124] "Latvia"                                                     
## [125] "Least developed countries"                                  
## [126] "Lebanon"                                                    
## [127] "Lesotho"                                                    
## [128] "Less developed regions"                                     
## [129] "Less developed regions, excluding China"                    
## [130] "Less developed regions, excluding least developed countries"
## [131] "Liberia"                                                    
## [132] "Libya"                                                      
## [133] "Liechtenstein"                                              
## [134] "Lithuania"                                                  
## [135] "Low-income countries"                                       
## [136] "Lower-middle-income countries"                              
## [137] "Luxembourg"                                                 
## [138] "Macao"                                                      
## [139] "Madagascar"                                                 
## [140] "Malawi"                                                     
## [141] "Malaysia"                                                   
## [142] "Maldives"                                                   
## [143] "Mali"                                                       
## [144] "Malta"                                                      
## [145] "Marshall Islands"                                           
## [146] "Martinique"                                                 
## [147] "Mauritania"                                                 
## [148] "Mauritius"                                                  
## [149] "Mayotte"                                                    
## [150] "Mexico"                                                     
## [151] "Micronesia (country)"                                       
## [152] "Middle-income countries"                                    
## [153] "Moldova"                                                    
## [154] "Monaco"                                                     
## [155] "Mongolia"                                                   
## [156] "Montenegro"                                                 
## [157] "Montserrat"                                                 
## [158] "More developed regions"                                     
## [159] "Morocco"                                                    
## [160] "Mozambique"                                                 
## [161] "Myanmar"                                                    
## [162] "Namibia"                                                    
## [163] "Nauru"                                                      
## [164] "Nepal"                                                      
## [165] "Netherlands"                                                
## [166] "New Caledonia"                                              
## [167] "New Zealand"                                                
## [168] "Nicaragua"                                                  
## [169] "Niger"                                                      
## [170] "Nigeria"                                                    
## [171] "Niue"                                                       
## [172] "No income group available"                                  
## [173] "North Korea"                                                
## [174] "North Macedonia"                                            
## [175] "Northern America"                                           
## [176] "Northern Ireland"                                           
## [177] "Northern Mariana Islands"                                   
## [178] "Norway"                                                     
## [179] "Oceania"                                                    
## [180] "Oman"                                                       
## [181] "Pakistan"                                                   
## [182] "Palau"                                                      
## [183] "Palestine"                                                  
## [184] "Panama"                                                     
## [185] "Papua New Guinea"                                           
## [186] "Paraguay"                                                   
## [187] "Peru"                                                       
## [188] "Philippines"                                                
## [189] "Poland"                                                     
## [190] "Portugal"                                                   
## [191] "Puerto Rico"                                                
## [192] "Qatar"                                                      
## [193] "Reunion"                                                    
## [194] "Romania"                                                    
## [195] "Russia"                                                     
## [196] "Rwanda"                                                     
## [197] "Saint Barthelemy"                                           
## [198] "Saint Helena"                                               
## [199] "Saint Kitts and Nevis"                                      
## [200] "Saint Lucia"                                                
## [201] "Saint Martin (French part)"                                 
## [202] "Saint Pierre and Miquelon"                                  
## [203] "Saint Vincent and the Grenadines"                           
## [204] "Samoa"                                                      
## [205] "San Marino"                                                 
## [206] "Sao Tome and Principe"                                      
## [207] "Saudi Arabia"                                               
## [208] "Scotland"                                                   
## [209] "Senegal"                                                    
## [210] "Serbia"                                                     
## [211] "Seychelles"                                                 
## [212] "Sierra Leone"                                               
## [213] "Singapore"                                                  
## [214] "Sint Maarten (Dutch part)"                                  
## [215] "Slovakia"                                                   
## [216] "Slovenia"                                                   
## [217] "Small Island Developing States (SIDS)"                      
## [218] "Solomon Islands"                                            
## [219] "Somalia"                                                    
## [220] "South Africa"                                               
## [221] "South Korea"                                                
## [222] "South Sudan"                                                
## [223] "Spain"                                                      
## [224] "Sri Lanka"                                                  
## [225] "Sudan"                                                      
## [226] "Suriname"                                                   
## [227] "Sweden"                                                     
## [228] "Switzerland"                                                
## [229] "Syria"                                                      
## [230] "Taiwan"                                                     
## [231] "Tajikistan"                                                 
## [232] "Tanzania"                                                   
## [233] "Thailand"                                                   
## [234] "Togo"                                                       
## [235] "Tokelau"                                                    
## [236] "Tonga"                                                      
## [237] "Trinidad and Tobago"                                        
## [238] "Tunisia"                                                    
## [239] "Turkey"                                                     
## [240] "Turkmenistan"                                               
## [241] "Turks and Caicos Islands"                                   
## [242] "Tuvalu"                                                     
## [243] "USSR"                                                       
## [244] "Uganda"                                                     
## [245] "Ukraine"                                                    
## [246] "United Arab Emirates"                                       
## [247] "United Kingdom"                                             
## [248] "United States"                                              
## [249] "United States Virgin Islands"                               
## [250] "Upper-middle-income countries"                              
## [251] "Uruguay"                                                    
## [252] "Uzbekistan"                                                 
## [253] "Vanuatu"                                                    
## [254] "Venezuela"                                                  
## [255] "Vietnam"                                                    
## [256] "Wallis and Futuna"                                          
## [257] "Western Sahara"                                             
## [258] "World"                                                      
## [259] "Yemen"                                                      
## [260] "Zambia"                                                     
## [261] "Zimbabwe"

Missing values

visdat::vis_miss(le_clean)

Which entities are missing codes?

Continents, Regions by Income and development status

le_clean %>% 
  filter(is.na(code)) %>% 
  select(entity) %>% 
  distinct()
## # A tibble: 23 × 1
##    entity                                 
##    <chr>                                  
##  1 Africa                                 
##  2 Americas                               
##  3 Asia                                   
##  4 England and Wales                      
##  5 Europe                                 
##  6 High-income countries                  
##  7 Land-locked Developing Countries (LLDC)
##  8 Latin America and the Caribbean        
##  9 Least developed countries              
## 10 Less developed regions                 
## # ℹ 13 more rows

World Life Expectancy by Year

p <- le_clean %>% 
  filter(entity == "World") %>% 
  ggplot(aes(x = year, y = period_life_expect)) +
  geom_line() +
  labs(title = "World Life Expectancy by Year",
       x = "Year",
       y = "Life Expectancy at Birth (years)")
ggplotly(p)

Looking into different development status regions

development_status <- stringr::str_subset(le_clean$entity, regex("devel", ignore_case = TRUE)) %>% unique()

p <- le_clean %>%
  filter(entity %in% development_status) %>%
  ggplot(aes(x = year, 
             y = period_life_expect,
             color = entity)) +
  geom_line() + 
  labs(title = "Life Expectancy by Year",
       x = "Year",
       y = "Life Expectancy at Birth (years)")
ggplotly(p)

How many countries have data before 1950?

le_clean %>% drop_na(code) %>%
  filter(year < 1950) %>% 
  pull(entity) %>% 
  unique() %>% 
  length()
## [1] 87

Only 87 of 238 countries have data before 1950.

le_clean %>% drop_na(code) %>%
  filter(year >= 1950) %>% 
  group_by(entity) %>% 
  count() %>% 
  filter(n == 72) %>%
  pull(entity) %>%
  unique() %>%
  length()
## [1] 237

237 of 238 countries have data every year since 1950.

le_clean %>% drop_na(code) %>%
  pull(entity) %>%
  unique() %>% 
  length()
## [1] 238

Difference in Life Expectancy

Create a new column that calculates the difference in life expectancy from the previous year.

le_clean <- le_clean %>% 
  group_by(entity) %>% 
  mutate(diff = c(NA, diff(period_life_expect))) %>%
  ungroup()
p <- le_clean %>%
  filter(entity %in% development_status) %>%
  drop_na(diff) %>%
  ggplot(aes(x = year, 
             y = diff,
             color = entity)) +
  geom_line() + 
  geom_abline(intercept = 0.243, slope = 0, color = "black", linetype = "dotted") +
  labs(title = "Life Expectancy Diff by Year",
       x = "Year",
       y = "Life Expectancy at Birth (years)")
ggplotly(p)

Time Series Analysis

Convert dataframes to time series

# create a list of 3 dataframes
le_dev_ls <- list()
for (dev in development_status) {
  le_dev_ls[[dev]] <- le_clean %>% 
    filter(entity == dev) %>%
    select(period_life_expect) %>%
    ts(start = 1950, end = 2021)
}
le_dev_ls
## $`Land-locked Developing Countries (LLDC)`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            39.4836
##  [2,]            39.7985
##  [3,]            40.2089
##  [4,]            40.6129
##  [5,]            40.9987
##  [6,]            41.4098
##  [7,]            41.8012
##  [8,]            40.8860
##  [9,]            41.2702
## [10,]            42.9791
## [11,]            43.3855
## [12,]            43.8382
## [13,]            44.2265
## [14,]            44.4622
## [15,]            45.0320
## [16,]            45.1144
## [17,]            44.7641
## [18,]            45.4924
## [19,]            45.7305
## [20,]            45.9128
## [21,]            46.1592
## [22,]            46.4167
## [23,]            45.8912
## [24,]            46.7321
## [25,]            46.6755
## [26,]            46.9242
## [27,]            47.7069
## [28,]            47.9663
## [29,]            48.1405
## [30,]            48.5353
## [31,]            48.9473
## [32,]            49.4773
## [33,]            49.7332
## [34,]            48.1001
## [35,]            47.9810
## [36,]            48.4865
## [37,]            49.2029
## [38,]            50.5464
## [39,]            49.9685
## [40,]            51.3800
## [41,]            51.2460
## [42,]            51.5154
## [43,]            51.1122
## [44,]            51.0153
## [45,]            48.1917
## [46,]            51.9796
## [47,]            52.1104
## [48,]            52.3150
## [49,]            51.7472
## [50,]            52.9955
## [51,]            53.5791
## [52,]            53.9245
## [53,]            54.5475
## [54,]            55.2809
## [55,]            56.0211
## [56,]            56.6869
## [57,]            57.4662
## [58,]            58.1920
## [59,]            58.9636
## [60,]            59.6964
## [61,]            60.4286
## [62,]            61.1749
## [63,]            61.8342
## [64,]            62.4491
## [65,]            62.9001
## [66,]            63.3501
## [67,]            63.9259
## [68,]            64.2752
## [69,]            64.6310
## [70,]            64.9831
## [71,]            64.0608
## [72,]            63.7099
## 
## $`Least developed countries`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            36.7606
##  [2,]            37.1166
##  [3,]            37.6441
##  [4,]            38.0314
##  [5,]            38.5840
##  [6,]            39.0524
##  [7,]            39.4372
##  [8,]            39.2478
##  [9,]            39.5448
## [10,]            40.5973
## [11,]            40.9301
## [12,]            41.3303
## [13,]            41.7962
## [14,]            41.9248
## [15,]            42.6287
## [16,]            42.6081
## [17,]            42.9056
## [18,]            43.5074
## [19,]            43.8672
## [20,]            44.1094
## [21,]            42.8667
## [22,]            38.3186
## [23,]            44.5179
## [24,]            45.2031
## [25,]            45.1392
## [26,]            43.7658
## [27,]            44.3664
## [28,]            46.2304
## [29,]            46.6554
## [30,]            47.1651
## [31,]            47.6444
## [32,]            47.9803
## [33,]            48.2630
## [34,]            47.3425
## [35,]            47.3111
## [36,]            47.6962
## [37,]            48.4363
## [38,]            48.9276
## [39,]            48.4161
## [40,]            49.9467
## [41,]            50.1401
## [42,]            49.7829
## [43,]            50.2226
## [44,]            50.8393
## [45,]            49.9143
## [46,]            52.1806
## [47,]            52.3293
## [48,]            52.9811
## [49,]            52.9004
## [50,]            54.3557
## [51,]            55.0574
## [52,]            55.5683
## [53,]            56.0698
## [54,]            56.7153
## [55,]            57.2381
## [56,]            57.8142
## [57,]            58.4518
## [58,]            58.9287
## [59,]            59.1136
## [60,]            60.1000
## [61,]            60.5229
## [62,]            61.3113
## [63,]            61.9123
## [64,]            62.3808
## [65,]            62.8141
## [66,]            63.2189
## [67,]            63.7597
## [68,]            64.1711
## [69,]            64.6089
## [70,]            64.9648
## [71,]            64.4953
## [72,]            64.0715
## 
## $`Less developed regions`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            41.2643
##  [2,]            42.1096
##  [3,]            43.2437
##  [4,]            43.8704
##  [5,]            44.6902
##  [6,]            45.2476
##  [7,]            45.7751
##  [8,]            46.2070
##  [9,]            46.6697
## [10,]            44.0977
## [11,]            42.1155
## [12,]            45.0834
## [13,]            48.5162
## [14,]            49.1238
## [15,]            49.7630
## [16,]            49.5482
## [17,]            50.2856
## [18,]            50.8350
## [19,]            51.6293
## [20,]            52.1374
## [21,]            52.4443
## [22,]            52.1331
## [23,]            53.6360
## [24,]            54.2855
## [25,]            54.7315
## [26,]            55.0278
## [27,]            55.5499
## [28,]            56.2761
## [29,]            56.7439
## [30,]            57.2830
## [31,]            57.7537
## [32,]            58.2335
## [33,]            58.6889
## [34,]            58.9221
## [35,]            59.3247
## [36,]            59.7419
## [37,]            60.2440
## [38,]            60.6798
## [39,]            60.8465
## [40,]            61.4357
## [41,]            61.6961
## [42,]            61.7999
## [43,]            62.1409
## [44,]            62.4766
## [45,]            62.5023
## [46,]            63.0250
## [47,]            63.2295
## [48,]            63.5679
## [49,]            63.7939
## [50,]            64.2641
## [51,]            64.6705
## [52,]            65.0843
## [53,]            65.4550
## [54,]            65.8258
## [55,]            66.1018
## [56,]            66.5652
## [57,]            67.0074
## [58,]            67.3330
## [59,]            67.5783
## [60,]            68.0619
## [61,]            68.3973
## [62,]            68.8198
## [63,]            69.2011
## [64,]            69.5546
## [65,]            69.9243
## [66,]            70.2258
## [67,]            70.5275
## [68,]            70.7720
## [69,]            71.0587
## [70,]            71.2636
## [71,]            70.6420
## [72,]            69.5611
## 
## $`Less developed regions, excluding China`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            40.2970
##  [2,]            41.1126
##  [3,]            42.3175
##  [4,]            42.9239
##  [5,]            43.7917
##  [6,]            44.4349
##  [7,]            44.9830
##  [8,]            45.3547
##  [9,]            45.8433
## [10,]            46.5319
## [11,]            46.9654
## [12,]            47.4022
## [13,]            47.8435
## [14,]            48.2357
## [15,]            48.7273
## [16,]            48.0571
## [17,]            48.8081
## [18,]            49.3294
## [19,]            50.1222
## [20,]            50.5421
## [21,]            50.6060
## [22,]            49.8281
## [23,]            51.6489
## [24,]            52.2434
## [25,]            52.6560
## [26,]            52.9095
## [27,]            53.4155
## [28,]            54.2625
## [29,]            54.7306
## [30,]            55.2581
## [31,]            55.7042
## [32,]            56.1442
## [33,]            56.5427
## [34,]            56.7694
## [35,]            57.1622
## [36,]            57.5732
## [37,]            58.0727
## [38,]            58.4791
## [39,]            58.6052
## [40,]            59.2737
## [41,]            59.5398
## [42,]            59.7154
## [43,]            60.0337
## [44,]            60.3623
## [45,]            60.3318
## [46,]            60.9079
## [47,]            61.1326
## [48,]            61.4810
## [49,]            61.6547
## [50,]            62.2149
## [51,]            62.6002
## [52,]            62.9321
## [53,]            63.3150
## [54,]            63.6923
## [55,]            63.9489
## [56,]            64.4419
## [57,]            64.8884
## [58,]            65.2284
## [59,]            65.5018
## [60,]            65.9783
## [61,]            66.3382
## [62,]            66.7941
## [63,]            67.1808
## [64,]            67.5765
## [65,]            67.9774
## [66,]            68.3150
## [67,]            68.6312
## [68,]            68.9489
## [69,]            69.2067
## [70,]            69.4320
## [71,]            68.5773
## [72,]            67.1767
## 
## $`Less developed regions, excluding least developed countries`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            41.9655
##  [2,]            42.8733
##  [3,]            44.0838
##  [4,]            44.7391
##  [5,]            45.5911
##  [6,]            46.1583
##  [7,]            46.7097
##  [8,]            47.2363
##  [9,]            47.7300
## [10,]            44.6174
## [11,]            42.3165
## [12,]            45.6754
## [13,]            49.5745
## [14,]            50.2328
## [15,]            50.8476
## [16,]            50.5967
## [17,]            51.4094
## [18,]            51.9605
## [19,]            52.8300
## [20,]            53.3884
## [21,]            53.9681
## [22,]            54.5085
## [23,]            55.0925
## [24,]            55.7514
## [25,]            56.3123
## [26,]            56.9397
## [27,]            57.4611
## [28,]            58.0116
## [29,]            58.5110
## [30,]            59.0747
## [31,]            59.5564
## [32,]            60.0603
## [33,]            60.5378
## [34,]            61.0239
## [35,]            61.5123
## [36,]            61.9414
## [37,]            62.3851
## [38,]            62.7887
## [39,]            63.1137
## [40,]            63.4971
## [41,]            63.7731
## [42,]            64.0083
## [43,]            64.3440
## [44,]            64.6419
## [45,]            64.8649
## [46,]            65.0858
## [47,]            65.3238
## [48,]            65.6215
## [49,]            65.9361
## [50,]            66.2149
## [51,]            66.5702
## [52,]            66.9624
## [53,]            67.3100
## [54,]            67.6354
## [55,]            67.8627
## [56,]            68.3035
## [57,]            68.7060
## [58,]            68.9963
## [59,]            69.2537
## [60,]            69.6261
## [61,]            69.9534
## [62,]            70.3006
## [63,]            70.6364
## [64,]            70.9670
## [65,]            71.3298
## [66,]            71.6204
## [67,]            71.8805
## [68,]            72.1041
## [69,]            72.3722
## [70,]            72.5593
## [71,]            71.9120
## [72,]            70.7380
## 
## $`More developed regions`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            63.5226
##  [2,]            63.7474
##  [3,]            64.8906
##  [4,]            65.4510
##  [5,]            66.2719
##  [6,]            66.8312
##  [7,]            67.4525
##  [8,]            67.4201
##  [9,]            68.4923
## [10,]            68.5434
## [11,]            68.9788
## [12,]            69.3642
## [13,]            69.2322
## [14,]            69.5071
## [15,]            70.0578
## [16,]            70.0328
## [17,]            70.2362
## [18,]            70.3486
## [19,]            70.2421
## [20,]            70.1420
## [21,]            70.4366
## [22,]            70.6864
## [23,]            70.9003
## [24,]            71.0107
## [25,]            71.3220
## [26,]            71.4104
## [27,]            71.5788
## [28,]            71.8867
## [29,]            71.9699
## [30,]            72.1535
## [31,]            72.1446
## [32,]            72.4718
## [33,]            72.7917
## [34,]            72.8019
## [35,]            72.9535
## [36,]            73.0721
## [37,]            73.6632
## [38,]            73.8639
## [39,]            73.9420
## [40,]            74.1252
## [41,]            74.1723
## [42,]            74.2353
## [43,]            74.1841
## [44,]            73.7605
## [45,]            73.8473
## [46,]            73.9420
## [47,]            74.4507
## [48,]            74.8981
## [49,]            75.1678
## [50,]            75.0479
## [51,]            75.2499
## [52,]            75.4903
## [53,]            75.5222
## [54,]            75.6161
## [55,]            76.1057
## [56,]            76.1876
## [57,]            76.7299
## [58,]            77.0591
## [59,]            77.2959
## [60,]            77.7106
## [61,]            77.9313
## [62,]            78.2741
## [63,]            78.4618
## [64,]            78.7052
## [65,]            78.9622
## [66,]            78.9890
## [67,]            79.2764
## [68,]            79.4257
## [69,]            79.5600
## [70,]            79.8152
## [71,]            78.5756
## [72,]            78.0650
## 
## $`Small Island Developing States (SIDS)`
## Time Series:
## Start = 1950 
## End = 2021 
## Frequency = 1 
##       period_life_expect
##  [1,]            48.8265
##  [2,]            48.9768
##  [3,]            49.9972
##  [4,]            50.7508
##  [5,]            51.5593
##  [6,]            52.3511
##  [7,]            52.9864
##  [8,]            53.5744
##  [9,]            54.1417
## [10,]            54.6788
## [11,]            55.2597
## [12,]            55.7218
## [13,]            56.2235
## [14,]            56.1992
## [15,]            57.0916
## [16,]            57.3051
## [17,]            57.9652
## [18,]            58.3952
## [19,]            58.7534
## [20,]            59.1650
## [21,]            59.6021
## [22,]            59.9933
## [23,]            60.3638
## [24,]            60.7716
## [25,]            61.1315
## [26,]            60.8586
## [27,]            61.1914
## [28,]            61.3368
## [29,]            61.4078
## [30,]            61.6260
## [31,]            62.2545
## [32,]            62.5757
## [33,]            62.9408
## [34,]            63.2342
## [35,]            63.5745
## [36,]            63.8578
## [37,]            64.1750
## [38,]            64.5565
## [39,]            64.8516
## [40,]            65.3362
## [41,]            65.5723
## [42,]            65.7896
## [43,]            66.0467
## [44,]            66.2993
## [45,]            66.4531
## [46,]            66.7833
## [47,]            67.0530
## [48,]            67.2988
## [49,]            67.4484
## [50,]            67.7879
## [51,]            68.4526
## [52,]            68.6337
## [53,]            68.8566
## [54,]            69.0807
## [55,]            69.0064
## [56,]            69.5725
## [57,]            69.8160
## [58,]            70.0658
## [59,]            70.3484
## [60,]            70.6697
## [61,]            67.1929
## [62,]            71.0957
## [63,]            71.3962
## [64,]            71.5950
## [65,]            71.7563
## [66,]            71.9234
## [67,]            72.0044
## [68,]            72.1283
## [69,]            72.2182
## [70,]            72.3776
## [71,]            72.1998
## [72,]            70.8139

Fit best ARIMA model

# install.packages("forecast")
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.1
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
le_dev_opt <- list()
for (dev in development_status) {
  le_dev_opt[[dev]] <- auto.arima(le_dev_ls[[dev]])
}
p1 <- le_dev_opt$`Least developed countries` %>% 
  forecast(h = 5) %>% 
  autoplot()
p2 <- le_dev_opt$`More developed regions` %>%
  forecast(h = 5) %>% 
  autoplot()
p3 <- le_dev_opt$`Less developed regions, excluding least developed countries` %>%
  forecast(h = 5) %>% 
  autoplot()
p4 <- le_dev_opt$`Land-locked Developing Countries (LLDC)` %>%
  forecast(h = 5) %>% 
  autoplot()
p5 <- le_dev_opt$`Small Island Developing States (SIDS)` %>%
  forecast(h = 5) %>% 
  autoplot()

# display plots side by side
gridExtra::grid.arrange(p1, p2, p3, p4, p5, ncol = 2)

ADF test

library(tseries)
## Warning: package 'tseries' was built under R version 4.4.1
le_dev <- le_clean %>%
  filter(entity %in% development_status) %>% 
  select(entity, year, diff) %>% drop_na()
adf.test(le_dev %>% filter(entity == "More developed regions") %>% pull(diff))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  le_dev %>% filter(entity == "More developed regions") %>% pull(diff)
## Dickey-Fuller = -2.5177, Lag order = 4, p-value = 0.3646
## alternative hypothesis: stationary

Global Vaccination Coverage

vaccination_df <- read_csv("data/global-vaccination-coverage.csv")
## Rows: 7897 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Entity, Code
## dbl (12): Year, BCG (% of one-year-olds immunized), HepB3 (% of one-year-old...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vaccination_df %>% glimpse()
## Rows: 7,897
## Columns: 14
## $ Entity                                 <chr> "Afghanistan", "Afghanistan", "…
## $ Code                                   <chr> "AFG", "AFG", "AFG", "AFG", "AF…
## $ Year                                   <dbl> 1982, 1983, 1984, 1985, 1986, 1…
## $ `BCG (% of one-year-olds immunized)`   <dbl> 10, 10, 11, 17, 18, 27, 40, 38,…
## $ `HepB3 (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Hib3 (% of one-year-olds immunized)`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `IPV1 (% of one-year-olds immunized)`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `MCV1 (% of one-year-olds immunized)`  <dbl> 8, 9, 14, 14, 14, 31, 34, 22, 2…
## $ `PCV3 (% of one-year-olds immunized)`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `Pol3 (% of one-year-olds immunized)`  <dbl> 5, 5, 16, 15, 11, 25, 35, 33, 2…
## $ `RCV1 (% of one-year-olds immunized)`  <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `RotaC (% of one-year-olds immunized)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `YFV (% of one-year-olds immunized)`   <dbl> NA, NA, NA, NA, NA, NA, NA, NA,…
## $ `DTP3 (% of one-year-olds immunized)`  <dbl> 5, 5, 16, 15, 11, 25, 35, 33, 2…
vaccination_clean <- janitor::clean_names(vaccination_df)

vaccination_long <- vaccination_clean %>% 
  pivot_longer(cols = -c(entity, code, year), 
               names_to = "vaccine", 
               values_to = "coverage")
visdat::vis_miss(vaccination_long)

WHO_regions <- stringr::str_subset(vaccination_df$Entity, "WHO") %>% unique()
p_vac <- vaccination_long %>% 
  filter(entity %in% WHO_regions) %>%
  ggplot(aes(x = year, y = coverage, color = vaccine)) +
  geom_line() + geom_point() +
  facet_wrap(~entity) +
  labs(title = "Global Vaccination Coverage by Year",
       x = "Year",
       y = "Coverage (%)")
ggplotly(p_vac)